clear all
set more off
cap log close

*===============================================================================
*	Getting started
*===============================================================================

*@cd "YOUR WORKING DIRECTORY HERE"
cd "P:\NCERDC-Taylor\data\constructed\"

glo main_spfn c.c23_bel c.c23_bel#c.retest ///
				c.( c.c23_scal c.c23_scal#c.c23_bel )#ibn.year ///
				, absorb( gbybs ) vce( cluster c23_scal )

*===============================================================================
*	Figure 1 
*===============================================================================

use effort-evaluation-data , clear

keep if inrange( year , 2003 , 2015 ) & inrange( test_grade , 3 , 7 ) ///
		& !mi( std_tp1 ) & !mi( c23_scal )

* estimation				
areg std_tp1 c.( c.c23_scal##c.c23_bel )##ib2008.year ///
	, absorb( gbybs ) vce( cluster c23_scal ) 

preserve	
	
* save estimates	
clear
set obs 13
gen year = _n + 2002

gen b = .
gen se = .
forv i = 2003/2015 {

	replace b  =  _b[`i'.year#c.c23_bel] if year == `i'
	replace se = _se[`i'.year#c.c23_bel] if year == `i' & year != 2008
	
}

gen uci = b + 1.96 * se
gen lci = b - 1.96 * se

* plot estimates
tw 	( rcap uci lci year , lc( gs6 )  ) ///
	( scatter b year , mc( black ) ms( square_hollow ) ) ///
	, ///
	xline( 2010.5 , lw( 40 ) lc( gs15 ) ) ///
	yline( .04 .02 0 -.02 -.04 , lc( gs13 ) lw( vthin ) ) ///
	yline( 0 , lp( dot ) ) ///
	ytitle( Effect of failing ) ///
	xtitle( Year ) ///
	xscale( range( 2003 2015 ) ) ///
	xlabel( 2003(1)2015 , angle(45) ) ///
	legend( off ) ///
	scheme( s2mono )

graph save figure1.gph , replace

restore

*===============================================================================
*	Table 1 
*===============================================================================

use effort-evaluation-data , clear

estimates clear
	
foreach x in std_tm1 std_tm1_read ret_in_tm1 female white daysabs eds swd lep {

	eststo : areg `x' c.c23_bel c.c23_bel#c.retest ///
				c.( c.c23_scal c.c23_scal#c.c23_bel )#ibn.retest ///
				, absorb( gbybs ) cluster( c23_scal ) ///
				, if inrange( year , 2004 , 2015 ) & !mi( std_tm1 ) 

}	

* output
esttab est* using table1-output.csv , replace ///
	b( 3 ) se( 3 ) keep( c23_bel c.c23_bel#c.retest ) ///
	nostar stats( N , fmt( 0 ) )

*===============================================================================
*	Table 2
*===============================================================================

use effort-evaluation-data , clear

keep if inrange( year , 2003 , 2015 ) & inrange( test_grade , 3 , 7 ) ///
		& !mi( std_tp1 ) & !mi( c23_scal )

estimates clear

* main estimate
eststo : areg std_tp1 ${main_spfn}

* f() by retest/not-retest period 
eststo : areg std_tp1 c.c23_bel c.c23_bel#c.retest ///
			c.( c.c23_scal c.c23_scal#c.c23_bel )#ibn.retest ///
			, absorb( gbybs ) vce( cluster c23_scal )

* f() by grade-by-year
eststo : areg std_tp1 c.c23_bel c.c23_bel#c.retest ///
			c.( c.c23_scal c.c23_scal#c.c23_bel )#ibn.test_grade#ibn.year ///
			, absorb( gbybs ) vce( cluster c23_scal )

* grade-by-year FE
eststo : areg std_tp1 c.c23_bel c.c23_bel#c.retest ///
			c.( c.c23_scal c.c23_scal#c.c23_bel )#ibn.year ///
			, absorb( gby ) vce( cluster c23_scal )

* alternative bandwidths
foreach i in min max {
	egen gby_`i' = `i'( c23_scal ) , by( test_grade year )
}
foreach b in .25 .5 .75 {

	eststo : areg std_tp1 ${main_spfn} , if inrange( c23_scal , gby_min*`b' , gby_max*`b' ) 
		
}

* only 2006-2012
eststo : areg std_tp1 ${main_spfn} , if inrange( year , 2006 , 2012 ) 

* effect at t+2
eststo : areg std_tp2 ${main_spfn}				
			
* reading estimate
use effort-evaluation-data , clear

keep if inrange( year , 2003 , 2015 ) & inrange( test_grade , 3 , 7 ) ///
		& !mi( std_tp1_read ) & !mi( c23_scal_read )

eststo : areg std_tp1_read c.c23_bel_read c.c23_bel_read#c.retest ///
				c.( c.c23_scal_read c.c23_scal_read#c.c23_bel_read )#ibn.year ///
				, absorb( gbybs ) vce( cluster c23_scal_read )
				
* output			
esttab est* using table2-output.csv , replace ///
	b( 3 ) se( 3 ) keep( c23_bel c.c23_bel#c.retest c23_bel_read c.c23_bel_read#c.retest ) ///
	nostar stats( N , fmt( 0 ) )

*===============================================================================
*	Table 3
*===============================================================================

use effort-evaluation-data , clear

keep if inrange( year , 2003 , 2015 ) & inrange( test_grade , 3 , 7 ) ///
		& !mi( std_tp1 ) & !mi( c23_scal )

estimates clear

* retesting prior to 2009
* grades 3 and 5 in districts which retested failing students in 3 and 5
eststo : areg std_tp1 ${main_spfn} , if inlist( test_grade , 3 , 5 ) & sem_lea == 0 & inrange( year , 2003 , 2012 ) 

	* ... limited to 2008 only
	eststo : areg std_tp1 ${main_spfn} , if inlist( test_grade , 3 , 5 ) & sem_lea == 0 & year == 2008
	* ... outcome t+2
	eststo : areg std_tp2 ${main_spfn} , if inlist( test_grade , 3 , 5 ) & sem_lea == 0 & inrange( year , 2003 , 2012 ) 
		
* no retesting prior to 2009
* grades 4, 6, and 7 in districts which retested failing students in 3 and 5
eststo : areg std_tp1 ${main_spfn} , if inlist( test_grade , 4 , 6 , 7 ) & sem_lea == 0  & inrange( year , 2003 , 2012 ) 

* grades 3 and 5 in districts which used SEM rule in 3 and 5			
eststo : areg std_tp1 ${main_spfn} , if inlist( test_grade , 3 , 5 ) & sem_lea == 1 & inrange( year , 2003 , 2012 ) 
			
* grades 4, 6, and 7 in districts which used SEM rule in 3 and 5
eststo : areg std_tp1 ${main_spfn} , if inlist( test_grade , 4 , 6 , 7 ) & sem_lea == 1 & inrange( year , 2003 , 2012 ) 			
		
* grade level
forv g = 3/7 {
	eststo : areg std_tp1 ${main_spfn} , if test_grade == `g'
}
			
* output			
esttab est* using table3-output.csv , replace ///
	b( 3 ) se( 3 ) keep( c23_bel c.c23_bel#c.retest ) ///
	nostar stats( N , fmt( 0 ) )

*===============================================================================
*	Table 4
*===============================================================================

use effort-evaluation-data , clear

keep if inrange( year , 2003 , 2015 ) & inrange( test_grade , 3 , 7 ) ///
		& !mi( std_tp1 ) & !mi( c23_scal )

estimates clear

* absences in t+1
eststo : areg daysabs_tp1 ${main_spfn}

* reading test score in t+1
eststo : areg std_tp1_read ${main_spfn}

* mean year t score of t+1 peers
eststo : areg pmbs_tp1 ${main_spfn}

* proportion t+1 peers failed t test
eststo : areg pmfb_tp1 ${main_spfn}

* value-added score of t+1 teacher
eststo : areg tva_tp1 ${main_spfn}

* retained (same grade t and t+1)
eststo : areg retained_tp1 ${main_spfn}

* heterogeneity by failed/passed reading in t
gen fail_read = c23_full_scal_read < 0 if !mi( c23_full_scal_read )
eststo : areg std_tp1 c.( c.c23_bel c.c23_bel#c.retest )#ibn.fail_read ibn.fail_read ///
			c.( c.c23_scal c.c23_scal#c.c23_bel )#ibn.year ///
			, absorb( gbybs ) vce( cluster c23_scal )

* output			
esttab est* using table4-output.csv , replace ///
	b( 3 ) se( 3 ) keep( c23_bel c.c23_bel#c.retest ///
		0.fail_read#c.c23_bel#c.retest 1.fail_read#c.c23_bel#c.retest ) ///
	nostar stats( N , fmt( 0 ) )
	
*===============================================================================
*	Appendix Figure A1
*===============================================================================

use effort-evaluation-data , clear

DCdensity c23_full_scal , breakpoint( 0 ) gen( md1 md2 md3 md4 md5 ) b( 1 ) 

hist c23_full_scal , frac width( 2 ) ///
	xline( 0 , lp( dash ) ) ///
	ytitle( Fraction of students ) ///
	xtitle( "Inital score, relative to pass/fail cutoff" ) ///
	scheme( s2mono )
		
graph save figureA1 , replace

*===============================================================================
*	Appendix Figure A2
*===============================================================================

use effort-evaluation-data , clear

keep if inrange( year , 2003 , 2015 ) & inrange( test_grade , 3 , 7 ) ///
		& !mi( std_tp1 ) & !mi( c23_scal )

* bin (scale score point) means 		
qui tab c23_scal , gen( Dc23_scal )
areg std_tp1 c.( Dc23_scal* )##c.retest , absorb( gbybs )		
predict bm 
collapse (mean) bm (count) N = bm , by( c23_scal c23_bel retest )

* slopes and intercepts		
forv r = 0/1 {
	forv b = 0/1 {
		qui reg bm c23_scal if retest == `r' & c23_bel == `b' [ fw = N ] 
		loc  cons_r`r'b`b' = _b[_cons]
		loc slope_r`r'b`b' = _b[c23_scal]
	}
}

* wide

tw 	( scatter bm c23_scal if retest == 1 & inrange( c23_scal , -10 , 9 ) , mc( black ) ms( square ) ) ///
	( funct y = `cons_r1b1' + `slope_r1b1'*x , range( -10 0 ) lc( black ) lp( solid ) ) ///
	( funct y = `cons_r1b0' + `slope_r1b0'*x , range(   0 9 ) lc( black ) lp( solid ) ) ///
	, ///
	subtitle( "Retest years" ) ///
	xline( 0 , lc( gray ) ) ///
	ytitle( "Math score at t+1" ) ///
	xtitle( "Inital score at t, relative to pass/fail cutoff" ) ///
	xlabel( -10(2)8 ) ///
	legend( off ) ///
	scheme( s2mono ) ///
	name( retest , replace )

tw 	( scatter bm c23_scal if retest == 0 & inrange( c23_scal , -10 , 9 ) , mc( gs4 ) ms( square_hollow ) ) ///
	( funct y = `cons_r0b1' + `slope_r0b1'*x , range( -10 0 ) lc( gs4 ) lp( dash ) ) ///
	( funct y = `cons_r0b0' + `slope_r0b0'*x , range(   0 9 ) lc( gs4 ) lp( dash ) ) ///		
	, ///
	subtitle( "Non-retest years" ) ///
	xline( 0 , lc( gray ) ) ///
	ytitle( "Math score at t+1" ) ///
	xtitle( "Inital score at t, relative to pass/fail cutoff" ) ///
	xlabel( -10(2)8 ) ///
	legend( off ) ///
	scheme( s2mono ) ///
	name( nonretest , replace )		

* zoom in

tw 	( scatter bm c23_scal if retest == 1 & inrange( c23_scal , -3 , 2 ) , mc( black ) ms( square ) ) ///
	( funct y = `cons_r1b1' + `slope_r1b1'*x , range( -3 0 ) lc( black ) lp( solid ) ) ///
	( funct y = `cons_r1b0' + `slope_r1b0'*x , range(  0 2 ) lc( black ) lp( solid ) ) ///
	, ///
	xline( 0 , lc( gray ) ) ///
	ytitle( "Math score at t+1" ) ///
	xtitle( "Inital score at t, relative to pass/fail cutoff" ) ///
	xlabel( -3(1)2 ) ///
	legend( off ) ///
	scheme( s2mono ) ///
	name( retestzoom , replace )		

tw 	( scatter bm c23_scal if retest == 0 & inrange( c23_scal , -3 , 2 ) , mc( gs4 ) ms( square_hollow ) ) ///
	( funct y = `cons_r0b1' + `slope_r0b1'*x , range( -3 0 ) lc( gs4 ) lp( dash ) ) ///
	( funct y = `cons_r0b0' + `slope_r0b0'*x , range(  0 2 ) lc( gs4 ) lp( dash ) ) ///		
	, ///
	xline( 0 , lc( gray ) ) ///
	ytitle( "Math score at t+1" ) ///
	xtitle( "Inital score at t, relative to pass/fail cutoff" ) ///
	xlabel( -3(1)2 ) ///
	legend( off ) ///
	scheme( s2mono ) ///
	name( nonretestzoom , replace )		

graph combine retest nonretest retestzoom nonretestzoom , scheme( s2mono )

graph save figureA2 , replace

*===============================================================================
*	Appendix Table A1
*===============================================================================

use effort-evaluation-data , clear

keep if inrange( year , 2003 , 2015 ) & inrange( test_grade , 3 , 7 )

loc x std      std_tp1      std_tm1 ///
      std_read std_tp1_read std_tm1_read ///
	  ret_in_tm1 female white daysabs eds swd lep 

sum `x' 

sum `x' if !mi( std_tp1 ) & !mi( c23_scal )

*===============================================================================
*	Appendix Figure C1
*===============================================================================

use effort-evaluation-data , clear

keep if inrange( year , 2006 , 2011 ) & inrange( test_grade , 3 , 7 ) 

* standardize scores using 2006 grade-specific mean/sd
gen std06_tp1 = .
forv i = 3/7 {
	sum scal if year == 2006 & test_grade == `i'
	replace std06_tp1 = ( scal_tp1 - r(mean) ) / r(sd) if test_grade == `i'	
}

* average difference pre and post policy (mentioned in Section C.5)
reg std06_tp1 c.retest##i.plvl if inrange( plvl , 2 , 3 ) 
* retested students
di _b[retest]
* non-retested students
lincom retest + 3.plvl#c.retest

collapse (mean) std06_tp1 , by( plvl year )

* test means in 2008 for figure dotted lines
forv i = 2/3 {
	sum std06_tp1 if plvl == `i' & year == 2008
	assert r(min) == r(max)
	loc mark`i' = r(min)
}

* figure
sort plvl year 
tw 	( scatter std06_tp1 year if plvl == 2 , c(l) ms( square         ) mc( black ) lc( black ) lp( solid ) ) ///
	( scatter std06_tp1 year if plvl == 3 , c(l) ms( diamond_hollow ) mc( black ) lc( black ) lp( dash  ) ) ///
	, ///
	ytitle( Math score at t+1 ) ///
	yline( `mark2' `mark3' , lc( gray ) lp( dot ) ) ///
	xtitle( Year t ) ///
	xline( 2008.9 , lc( gray ) ) ///
	legend( order( 1 "Level II year t" 2 "Level III year t" ) rows( 1 ) ) ///
	scheme( s2mono )

graph save figureC1 , replace	

*===============================================================================
*	Appendix Table C1
*===============================================================================
*-------------------------------------------------------------------------------
*	Identify LEAs with more or less retention after end of policy in 2010
*	(LEA specific diff-in-RD on retention)
*-------------------------------------------------------------------------------

use effort-evaluation-data , clear

tsset mastid year
gen retained = test_grade == f.test_grade if !mi( f.test_grade ) 

keep if inrange( year , 2009 , 2012 ) & inlist( test_grade , 3 , 5 ) & !mi( c23_scal )

* state retention policy turns off after 2010
gen off = inrange( year , 2011 , 2012 )

* drop LEAs that have < 5 cases in each "cell"
* where the 4 cells are defined by the 2x2 of above/below cutoff x before/after the end of the policy
egen cellN = count( mastid ) , by( lea c23_bel off )
egen min_cellN = min( cellN ) , by( lea )
drop if min_cellN < 5

tempname pfh
tempfile pff
postfile `pfh' str3( lea ) bo bo_se using `pff' , replace

levelsof lea , clean
	
foreach l in `r(levels)' {

	areg retained c.( c.c23_scal##c.c23_bel )##c.off ///
		, absorb( gbybs ) cluster( c23_scal ) ///
		, if lea == "`l'" 

	post `pfh' ( "`l'" ) ( _b[c.c23_bel#c.off] ) ( _se[c.c23_bel#c.off] ) 
	
}

postclose `pfh'

use `pff' , clear

drop if bo_se == 0

xtile ter_change = bo , n( 3 ) 

tempfile leachange
save `leachange' , replace

*-------------------------------------------------------------------------------
*	Diff-in-RD estimates by tercile
*-------------------------------------------------------------------------------

use effort-evaluation-data , clear

keep if inrange( year , 2003 , 2015 ) & inrange( test_grade , 3 , 7 ) & !mi( std_tp1 ) & !mi( c23_scal )

merge m:1 lea using `leachange' , keep( master match ) nogen

estimates clear

forv i = 1/3 {

	eststo : areg std_tp1 ${main_spfn} , if ter_change == `i'

}

* output			
esttab est* using tableC1-output.csv , replace ///
	b( 3 ) se( 3 ) keep( c23_bel c.c23_bel#c.retest ) ///
	nostar stats( N , fmt( 0 ) )

*===============================================================================
*	Appendix Table C2
*===============================================================================

use effort-evaluation-data , clear

keep if inrange( year , 2009 , 2012 ) & inrange( test_grade , 3 , 7 ) ///
		& !mi( std_tp1 ) & !mi( c23_scal )

gen retgain = std1 - std

estimates clear

forv i = 1/10 {

	eststo : areg std_tp1 c.( c.c23_scal1##c.c23_bel1 ) ///
		, absorb( gbybs ) cluster( c23_scal1 ) ///
		, if c23_scal == -`i' 
		
	sum c23_bel1 if c23_scal == -`i' 	
	estadd scalar passret = 1-r(mean)			
		
	sum retgain if c23_scal == -`i' 
	estadd scalar retgain = r(mean)
	
}	

* output			
esttab est* using tableC2-output.csv , replace ///
	b( 3 ) se( 3 ) keep( c23_bel1 ) ///
	nostar stats( N passret retgain , fmt( 0 3 3 ) )

*===============================================================================
*	Appendix Table C3
*===============================================================================

use effort-evaluation-data , clear

* covariates
foreach v in female white lep eds swd {
	gen mi_`v' = mi( `v' )
	replace `v' = 0 if mi( `v' )
}
loc x c.std_tm1##c.std_tm1 i.test_grade ///
	  female mi_female white mi_white lep mi_lep eds mi_eds swd mi_swd
	  
* t+1 teacher  
tsset mastid year
gen teachid_tp1 = f.teachid

* districts and grades where there was retesting before and after the retesting policy
keep if inlist( test_grade , 3 , 5 ) & sem_lea == 0 
* years when there was retested and we have data on it
keep if inrange( year , 2008 , 2012 ) 
* retested students
keep if c23_bel == 1 

* value-added to retest 
gen retgain = std1 - std 
areg retgain `x' , absorb( teachid )
predict fe , d

estimates clear

* +/- 1 year of policy change
keep if inrange( year , 2008 , 2009 ) 

* predict t+1 scores with value-added of t teacher
eststo : areg std_tp1 c.fe##c.retest `x' , vce( cluster teachid ) absorb( teachid_tp1 ) 

* predict t+1 scores with value-added of t teacher
eststo :  reg std_tp1 c.fe##c.retest `x' , vce( cluster teachid ) , if e(sample)

* output			
esttab est* using tableC3-output.csv , replace ///
	b( 3 ) se( 3 ) keep( fe retest c.fe#c.retest ) ///
	nostar stats( N , fmt( 0 ) )

*===============================================================================
*	Appendix Table C4
*===============================================================================

use effort-evaluation-data , clear

* proportion of the test window elapsed 
egen daysofclass = mode( daysmem ) , by( lea schlcode year ) maxmode
	gen ptwe = [ daysofclass - ( 180-15 ) ] / 15 if !inrange( year , 2009 , 2012 )
replace ptwe = [ daysofclass - ( 180-22 ) ] / 22 if  inrange( year , 2009 , 2012 )
replace ptwe = . if !inrange( ptwe , 0 , 1 )

gen fail = c23_full_scal < 0 if !mi( c23_full_scal )

collapse (mean) pfail = fail (count) Nstu = mastid , by( lea schlcode year retest ptwe )

keep if Nstu >= 100

* school proportion failing prior year
egen school = group( lea schlcode )
tsset school year
gen pfail_tm1 = l.pfail

estimates clear

* +/- 2 years of start, 2007-2010
eststo : areg ptwe c.pfail_tm1##c.retest   , absorb( year ) vce( cluster school ) ///
			, if inrange( year , 2007 , 2010 ) 

* +/- 2 years of end, 2011-2014
eststo : areg ptwe c.pfail_tm1##c.retest   , absorb( year ) vce( cluster school ) ///
			, if inrange( year , 2011 , 2014 ) 
			
* all years
eststo : areg ptwe c.pfail_tm1##c.retest   , absorb( year ) vce( cluster school ) ///
			, if inrange( year , 2004 , 2014 ) 		

* placebo 1
gen placebo1 = inrange( year , 2007 , 2008 )
eststo : areg ptwe c.pfail_tm1##c.placebo1 , absorb( year ) vce( cluster school ) ///
			, if inrange( year , 2005 , 2008 ) 

* placebo 2
gen placebo2 = inrange( year , 2011 , 2012 )
eststo : areg ptwe c.pfail_tm1##c.placebo2 , absorb( year ) vce( cluster school ) ///
			, if inrange( year , 2009 , 2012 )			

* output			
esttab est* using tableC4-output.csv , replace ///
	b( 3 ) se( 3 ) keep( *pfail_tm1* ) ///
	nostar stats( N , fmt( 0 ) )

*===============================================================================
*	Appendix Table C5
*===============================================================================

use effort-evaluation-data , clear

tsset mastid year

* class
keep if !mi( teachid )
egen class_t = group( teachid test_grade year )

* proportion of year t-1 classmates who scored level 2 (failed->retested) in year t-1
gen l2_t = c23_scal < 0 
egen cl_t_l2_t = mean( l2_t ) , by( class_t )
gen cl_tm1_l2_tm1 = l.cl_t_l2_t 

* proportion of year t classmates who scored level 2 (failed->retested) in year t-1
gen l2_tm1 = l.c23_scal < 0 
egen cl_t_l2_tm1 = mean( l2_tm1 ) , by( class_t )

* constant test, t and t-1 in (2006,2012)
keep if inrange( year , 2007 , 2012 )

* t-1 class subject to retest policy
gen post_tm1 = year >= 2010
* t   class subject to retest policy
gen post_t   = year >= 2009

* only level 2 students
keep if l.c23_scal >= 0 
keep if !mi( l.c23_scal )

estimates clear			

eststo: areg std c.cl_tm1_l2_tm1##c.post_tm1                         c.std_tm1##c.std_tm1##i.gby , absorb( teachid ) robust
eststo: areg std                             c.cl_t_l2_tm1##c.post_t c.std_tm1##c.std_tm1##i.gby , absorb( teachid ) robust
eststo: areg std c.cl_tm1_l2_tm1##c.post_tm1 c.cl_t_l2_tm1##c.post_t c.std_tm1##c.std_tm1##i.gby , absorb( teachid ) robust

esttab est* using tableC5-output.csv , replace ///
	b( 3 ) se( 3 ) keep( *cl_tm1_l2_tm1* *cl_t_l2_tm1* ) ///
	nostar stats( N , fmt( 0 ) )
